cd ../..
/code
%run "source/config/notebook_settings.py"
import os
import mlflow
from mlflow.tracking import MlflowClient
from helpsk.utility import read_pickle
import helpsk as hlp
from source.library.utilities import Timer, log_info, get_config
config = get_config()
mlflow_uri = config['MLFLOW']['URI']
log_info(f"MLFlow URI: {mlflow_uri}")
client = MlflowClient(tracking_uri=mlflow_uri)
2022-06-15 02:35:25 - INFO | MLFlow URI: http://mlflow_server:1235
# Get the production model version and actual model
production_model_info = client.get_latest_versions(name=config['MLFLOW']['MODEL_NAME'], stages=['Production'])
assert len(production_model_info) == 1
production_model_info = production_model_info[0]
production_model = read_pickle(client.download_artifacts(
run_id=production_model_info.run_id,
path='model/model.pkl'
))
log_info(f"Production Model Version: {production_model_info.version}")
2022-06-15 02:35:25 - INFO | Production Model Version: 2
# get experiment and latest run info
credit_experiment = client.get_experiment_by_name(name=config['MLFLOW']['EXPERIMENT_NAME'])
runs = client.list_run_infos(experiment_id=credit_experiment.experiment_id)
latest_run = runs[np.argmax([x.start_time for x in runs])]
yaml_path = client.download_artifacts(run_id=latest_run.run_id, path='experiment.yaml')
results = hlp.sklearn_eval.MLExperimentResults.from_yaml_file(yaml_file_name = yaml_path)
# get the best estimator from the BayesSearchCV
best_estimator = read_pickle(client.download_artifacts(
run_id=latest_run.run_id,
path='model/model.pkl'
))
best_estimator.model
Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer(strategy='median'))),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser(transformer=PCA(n_components='mle')))]),
['duration', 'credit_amount',
'installment_commitment',
'residence_since', 'age',
'existing_credi...
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing', 'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(criterion='entropy', max_depth=99,
max_features=0.031837350792579364,
max_samples=0.9248344222191298,
min_samples_leaf=4,
min_samples_split=16, n_estimators=1235,
random_state=42))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. Pipeline(steps=[('prep',
ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer(strategy='median'))),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser(transformer=PCA(n_components='mle')))]),
['duration', 'credit_amount',
'installment_commitment',
'residence_since', 'age',
'existing_credi...
'employment',
'personal_status',
'other_parties',
'property_magnitude',
'other_payment_plans',
'housing', 'job',
'own_telephone',
'foreign_worker'])])),
('model',
RandomForestClassifier(criterion='entropy', max_depth=99,
max_features=0.031837350792579364,
max_samples=0.9248344222191298,
min_samples_leaf=4,
min_samples_split=16, n_estimators=1235,
random_state=42))])ColumnTransformer(transformers=[('numeric',
Pipeline(steps=[('imputer',
TransformerChooser(transformer=SimpleImputer(strategy='median'))),
('scaler',
TransformerChooser()),
('pca',
TransformerChooser(transformer=PCA(n_components='mle')))]),
['duration', 'credit_amount',
'installment_commitment', 'residence_since',
'age', 'existing_credits',
'num_dependents']),
('non_numeric',
Pipeline(steps=[('encoder',
TransformerChooser(transformer=CustomOrdinalEncoder()))]),
['checking_status', 'credit_history',
'purpose', 'savings_status', 'employment',
'personal_status', 'other_parties',
'property_magnitude', 'other_payment_plans',
'housing', 'job', 'own_telephone',
'foreign_worker'])])['duration', 'credit_amount', 'installment_commitment', 'residence_since', 'age', 'existing_credits', 'num_dependents']
TransformerChooser(transformer=SimpleImputer(strategy='median'))
SimpleImputer(strategy='median')
SimpleImputer(strategy='median')
TransformerChooser()
TransformerChooser(transformer=PCA(n_components='mle'))
PCA(n_components='mle')
PCA(n_components='mle')
['checking_status', 'credit_history', 'purpose', 'savings_status', 'employment', 'personal_status', 'other_parties', 'property_magnitude', 'other_payment_plans', 'housing', 'job', 'own_telephone', 'foreign_worker']
TransformerChooser(transformer=CustomOrdinalEncoder())
CustomOrdinalEncoder()
CustomOrdinalEncoder()
RandomForestClassifier(criterion='entropy', max_depth=99,
max_features=0.031837350792579364,
max_samples=0.9248344222191298, min_samples_leaf=4,
min_samples_split=16, n_estimators=1235,
random_state=42)client.download_artifacts(run_id=latest_run.run_id, path='x_train.pkl')
'/code/mlflow-artifact-root/1/1dd8223fdb144295a0603f0c985c23bc/artifacts/x_train.pkl'
with Timer("Loading training/test datasets"):
X_train = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='x_train.pkl'))
X_test = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='x_test.pkl'))
y_train = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='y_train.pkl'))
y_test = pd.pandas.read_pickle(client.download_artifacts(run_id=latest_run.run_id, path='y_test.pkl'))
2022-06-15 02:35:25 - INFO | *****Timer Started: Loading training/test datasets 2022-06-15 02:35:25 - INFO | *****Timer Finished (0.02 seconds)
log_info(X_train.shape)
log_info(len(y_train))
log_info(X_test.shape)
log_info(len(y_test))
2022-06-15 02:35:25 - INFO | (800, 20) 2022-06-15 02:35:25 - INFO | 800 2022-06-15 02:35:25 - INFO | (200, 20) 2022-06-15 02:35:25 - INFO | 200
np.unique(y_train, return_counts=True)
(array([0, 1]), array([559, 241]))
np.unique(y_train, return_counts=True)[1] / np.sum(np.unique(y_train, return_counts=True)[1])
array([0.69875, 0.30125])
np.unique(y_test, return_counts=True)[1] / np.sum(np.unique(y_test, return_counts=True)[1])
array([0.705, 0.295])
log_info(f"Best Score: {results.best_score}")
2022-06-15 02:35:25 - INFO | Best Score: 0.7741159200200212
log_info(f"Best Params: {results.best_params}")
2022-06-15 02:35:25 - INFO | Best Params: {'model': 'RandomForestClassifier()', 'max_features': 0.031837350792579364, 'max_depth': 99, 'n_estimators': 1235, 'min_samples_split': 16, 'min_samples_leaf': 4, 'max_samples': 0.9248344222191298, 'criterion': 'entropy', 'imputer': "SimpleImputer(strategy='median')", 'scaler': 'None', 'pca': "PCA('mle')", 'encoder': 'CustomOrdinalEncoder()'}
# Best model from each model-type.
df = results.to_formatted_dataframe(return_style=False, include_rank=True)
df["model_rank"] = df.groupby("model")["roc_auc Mean"].rank(method="first", ascending=False)
df.query('model_rank == 1')
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | model | C | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | learning_rate | min_child_weight | subsample | colsample_bytree | colsample_bylevel | reg_alpha | reg_lambda | imputer | scaler | pca | encoder | model_rank | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 11 | 1 | 0.77 | 0.74 | 0.81 | RandomForestClassifier() | NaN | 0.03 | 99.00 | 1235.00 | 16.00 | 4.00 | 0.92 | entropy | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer(strategy='median') | None | PCA('mle') | CustomOrdinalEncoder() | 1.00 |
| 2 | 3 | 0.76 | 0.71 | 0.81 | LogisticRegression() | 0.00 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer(strategy='median') | MinMaxScaler() | PCA('mle') | OneHotEncoder() | 1.00 |
| 7 | 6 | 0.75 | 0.71 | 0.80 | ExtraTreesClassifier() | NaN | 0.14 | 40.00 | 1489.00 | 41.00 | 27.00 | 0.94 | entropy | NaN | NaN | NaN | NaN | NaN | NaN | NaN | SimpleImputer(strategy='median') | None | None | OneHotEncoder() | 1.00 |
| 18 | 10 | 0.75 | 0.70 | 0.79 | XGBClassifier() | NaN | NaN | 5.00 | 1157.00 | NaN | NaN | NaN | NaN | 0.02 | 3.00 | 0.69 | 0.50 | 0.73 | 0.03 | 2.91 | SimpleImputer(strategy='median') | None | PCA('mle') | CustomOrdinalEncoder() | 1.00 |
results.to_formatted_dataframe(return_style=True,
include_rank=True,
num_rows=500)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | model | C | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | learning_rate | min_child_weight | subsample | colsample_bytree | colsample_bylevel | reg_alpha | reg_lambda | imputer | scaler | pca | encoder |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0.774 | 0.742 | 0.806 | RandomForestClassifier() | <NA> | 0.032 | 99.000 | 1,235.000 | 16.000 | 4.000 | 0.925 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | PCA('mle') | CustomOrdinalEncoder() |
| 2 | 0.761 | 0.702 | 0.820 | RandomForestClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 3 | 0.759 | 0.711 | 0.807 | LogisticRegression() | 0.000 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | MinMaxScaler() | PCA('mle') | OneHotEncoder() |
| 4 | 0.759 | 0.712 | 0.806 | LogisticRegression() | 0.000 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | MinMaxScaler() | None | OneHotEncoder() |
| 5 | 0.757 | 0.711 | 0.803 | LogisticRegression() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | StandardScaler() | None | OneHotEncoder() |
| 6 | 0.753 | 0.707 | 0.800 | ExtraTreesClassifier() | <NA> | 0.137 | 40.000 | 1,489.000 | 41.000 | 27.000 | 0.944 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | None | OneHotEncoder() |
| 7 | 0.751 | 0.702 | 0.800 | ExtraTreesClassifier() | <NA> | 0.784 | 4.000 | 1,324.000 | 47.000 | 4.000 | 0.864 | entropy | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | PCA('mle') | OneHotEncoder() |
| 8 | 0.749 | 0.724 | 0.774 | ExtraTreesClassifier() | <NA> | 0.563 | 71.000 | 1,725.000 | 49.000 | 16.000 | 0.956 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | None | CustomOrdinalEncoder() |
| 9 | 0.748 | 0.702 | 0.793 | LogisticRegression() | 95.634 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | MinMaxScaler() | None | OneHotEncoder() |
| 10 | 0.747 | 0.704 | 0.789 | XGBClassifier() | <NA> | <NA> | 5.000 | 1,157.000 | <NA> | <NA> | <NA> | <NA> | 0.018 | 3.000 | 0.694 | 0.501 | 0.726 | 0.033 | 2.910 | SimpleImputer(strategy='median') | None | PCA('mle') | CustomOrdinalEncoder() |
| 11 | 0.746 | 0.698 | 0.794 | ExtraTreesClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 12 | 0.746 | 0.700 | 0.791 | RandomForestClassifier() | <NA> | 0.869 | 98.000 | 1,537.000 | 15.000 | 4.000 | 0.622 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | None | CustomOrdinalEncoder() |
| 13 | 0.745 | 0.703 | 0.787 | RandomForestClassifier() | <NA> | 0.757 | 44.000 | 745.000 | 33.000 | 6.000 | 0.608 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | CustomOrdinalEncoder() |
| 14 | 0.742 | 0.691 | 0.793 | XGBClassifier() | <NA> | <NA> | 2.000 | 1,671.000 | <NA> | <NA> | <NA> | <NA> | 0.021 | 2.000 | 0.657 | 0.591 | 0.780 | 0.026 | 3.081 | SimpleImputer(strategy='median') | None | None | OneHotEncoder() |
| 15 | 0.740 | 0.718 | 0.761 | XGBClassifier() | <NA> | <NA> | 9.000 | 551.000 | <NA> | <NA> | <NA> | <NA> | 0.044 | 20.000 | 0.675 | 0.837 | 0.550 | 0.018 | 3.292 | SimpleImputer(strategy='median') | None | None | CustomOrdinalEncoder() |
| 16 | 0.738 | 0.727 | 0.748 | ExtraTreesClassifier() | <NA> | 0.861 | 52.000 | 1,995.000 | 33.000 | 19.000 | 0.651 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='most_frequent') | None | None | CustomOrdinalEncoder() |
| 17 | 0.737 | 0.716 | 0.757 | RandomForestClassifier() | <NA> | 0.528 | 70.000 | 1,003.000 | 37.000 | 19.000 | 0.530 | gini | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | None | PCA('mle') | CustomOrdinalEncoder() |
| 18 | 0.723 | 0.705 | 0.741 | LogisticRegression() | 0.000 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer(strategy='median') | StandardScaler() | None | CustomOrdinalEncoder() |
| 19 | 0.714 | 0.648 | 0.779 | XGBClassifier() | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | None | OneHotEncoder() |
| 20 | 0.712 | 0.704 | 0.719 | XGBClassifier() | <NA> | <NA> | 16.000 | 1,642.000 | <NA> | <NA> | <NA> | <NA> | 0.071 | 27.000 | 0.899 | 0.908 | 0.592 | 0.001 | 1.009 | SimpleImputer(strategy='median') | None | PCA('mle') | CustomOrdinalEncoder() |
results.to_formatted_dataframe(query='model == "RandomForestClassifier()"', include_rank=True)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | max_features | max_depth | n_estimators | min_samples_split | min_samples_leaf | max_samples | criterion | imputer | pca | encoder |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 0.774 | 0.742 | 0.806 | 0.032 | 99.000 | 1,235.000 | 16.000 | 4.000 | 0.925 | entropy | SimpleImputer(strategy='median') | PCA('mle') | CustomOrdinalEncoder() |
| 2 | 0.761 | 0.702 | 0.820 | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | <NA> | SimpleImputer() | None | OneHotEncoder() |
| 3 | 0.746 | 0.700 | 0.791 | 0.869 | 98.000 | 1,537.000 | 15.000 | 4.000 | 0.622 | gini | SimpleImputer(strategy='median') | None | CustomOrdinalEncoder() |
| 4 | 0.745 | 0.703 | 0.787 | 0.757 | 44.000 | 745.000 | 33.000 | 6.000 | 0.608 | gini | SimpleImputer() | None | CustomOrdinalEncoder() |
| 5 | 0.737 | 0.716 | 0.757 | 0.528 | 70.000 | 1,003.000 | 37.000 | 19.000 | 0.530 | gini | SimpleImputer(strategy='median') | PCA('mle') | CustomOrdinalEncoder() |
results.to_formatted_dataframe(query='model == "LogisticRegression()"', include_rank=True)
| rank | roc_auc Mean | roc_auc 95CI.LO | roc_auc 95CI.HI | C | imputer | scaler | pca | encoder |
|---|---|---|---|---|---|---|---|---|
| 1 | 0.759 | 0.711 | 0.807 | 0.000 | SimpleImputer(strategy='median') | MinMaxScaler() | PCA('mle') | OneHotEncoder() |
| 2 | 0.759 | 0.712 | 0.806 | 0.000 | SimpleImputer(strategy='most_frequent') | MinMaxScaler() | None | OneHotEncoder() |
| 3 | 0.757 | 0.711 | 0.803 | <NA> | SimpleImputer() | StandardScaler() | None | OneHotEncoder() |
| 4 | 0.748 | 0.702 | 0.793 | 95.634 | SimpleImputer(strategy='median') | MinMaxScaler() | None | OneHotEncoder() |
| 5 | 0.723 | 0.705 | 0.741 | 0.000 | SimpleImputer(strategy='median') | StandardScaler() | None | CustomOrdinalEncoder() |
results.plot_performance_across_trials(facet_by='model').show()
results.plot_performance_across_trials(query='model == "RandomForestClassifier()"').show()
results.plot_parameter_values_across_trials(query='model == "RandomForestClassifier()"').show()
# results.plot_scatter_matrix(query='model == "RandomForestClassifier()"',
# height=1000, width=1000).show()
results.plot_performance_numeric_params(query='model == "RandomForestClassifier()"',
height=800)
/usr/local/lib/python3.9/site-packages/statsmodels/nonparametric/smoothers_lowess.py:227: RuntimeWarning: invalid value encountered in true_divide
results.plot_parallel_coordinates(query='model == "RandomForestClassifier()"').show()
results.plot_performance_non_numeric_params(query='model == "RandomForestClassifier()"').show()
results.plot_score_vs_parameter(
query='model == "RandomForestClassifier()"',
parameter='max_features',
size='max_depth',
color='encoder',
)
# results.plot_parameter_vs_parameter(
# query='model == "XGBClassifier()"',
# parameter_x='colsample_bytree',
# parameter_y='learning_rate',
# size='max_depth'
# )
# results.plot_parameter_vs_parameter(
# query='model == "XGBClassifier()"',
# parameter_x='colsample_bytree',
# parameter_y='learning_rate',
# size='imputer'
# )
test_predictions = best_estimator.predict(X_test)
test_predictions[0:10]
array([0.34417795, 0.36143173, 0.46583925, 0.33332345, 0.21120017,
0.27141023, 0.17140721, 0.40142613, 0.17645289, 0.22338202])
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
actual_values=y_test,
predicted_scores=test_predictions,
score_threshold=0.37
)
evaluator.plot_actual_vs_predict_histogram()
evaluator.plot_confusion_matrix()
evaluator.all_metrics_df(return_style=True,
dummy_classifier_strategy=['prior', 'constant'],
round_by=3)
| Score | Dummy (prior) | Dummy (constant) | Explanation | |
|---|---|---|---|---|
| AUC | 0.783 | 0.500 | 0.500 | Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier) |
| True Positive Rate | 0.525 | 0.000 | 1.000 | 52.5% of positive instances were correctly identified.; i.e. 31 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall |
| True Negative Rate | 0.851 | 1.000 | 0.000 | 85.1% of negative instances were correctly identified.; i.e. 120 "Negative Class" labels were correctly identified out of 141 instances |
| False Positive Rate | 0.149 | 0.000 | 1.000 | 14.9% of negative instances were incorrectly identified as positive; i.e. 21 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances |
| False Negative Rate | 0.475 | 1.000 | 0.000 | 47.5% of positive instances were incorrectly identified as negative; i.e. 28 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances |
| Positive Predictive Value | 0.596 | 0.000 | 0.295 | When the model claims an instance is positive, it is correct 59.6% of the time; i.e. out of the 52 times the model predicted "Positive Class", it was correct 31 times; a.k.a precision |
| Negative Predictive Value | 0.811 | 0.705 | 0.000 | When the model claims an instance is negative, it is correct 81.1% of the time; i.e. out of the 148 times the model predicted "Negative Class", it was correct 120 times |
| F1 Score | 0.559 | 0.000 | 0.456 | The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. |
| Precision/Recall AUC | 0.618 | 0.295 | 0.295 | Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats. |
| Accuracy | 0.755 | 0.705 | 0.295 | 75.5% of instances were correctly identified |
| Error Rate | 0.245 | 0.295 | 0.705 | 24.5% of instances were incorrectly identified |
| % Positive | 0.295 | 0.295 | 0.295 | 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class" |
| Total Observations | 200 | 200 | 200 | There are 200 total observations; i.e. sample size |
evaluator.plot_roc_auc_curve().show()
<Figure size 720x444.984 with 0 Axes>
evaluator.plot_precision_recall_auc_curve().show()
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
evaluator.calculate_lift_gain(return_style=True)
| Gain | Lift | |
|---|---|---|
| Percentile | ||
| 5 | 0.14 | 2.71 |
| 10 | 0.22 | 2.20 |
| 15 | 0.32 | 2.15 |
| 20 | 0.44 | 2.20 |
| 25 | 0.51 | 2.03 |
| 30 | 0.58 | 1.92 |
| 35 | 0.64 | 1.84 |
| 40 | 0.71 | 1.78 |
| 45 | 0.76 | 1.69 |
| 50 | 0.83 | 1.66 |
| 55 | 0.85 | 1.54 |
| 60 | 0.86 | 1.44 |
| 65 | 0.86 | 1.33 |
| 70 | 0.88 | 1.26 |
| 75 | 0.92 | 1.22 |
| 80 | 0.97 | 1.21 |
| 85 | 0.97 | 1.14 |
| 90 | 1.00 | 1.11 |
| 95 | 1.00 | 1.05 |
| 100 | 1.00 | 1.00 |
test_predictions = production_model.predict(X_test)
test_predictions[0:10]
array([0.34417795, 0.36143173, 0.46583925, 0.33332345, 0.21120017,
0.27141023, 0.17140721, 0.40142613, 0.17645289, 0.22338202])
evaluator = hlp.sklearn_eval.TwoClassEvaluator(
actual_values=y_test,
predicted_scores=test_predictions,
score_threshold=0.37
)
evaluator.plot_actual_vs_predict_histogram()
evaluator.plot_confusion_matrix()
evaluator.all_metrics_df(return_style=True,
dummy_classifier_strategy=['prior', 'constant'],
round_by=3)
| Score | Dummy (prior) | Dummy (constant) | Explanation | |
|---|---|---|---|---|
| AUC | 0.783 | 0.500 | 0.500 | Area under the ROC curve (true pos. rate vs false pos. rate); ranges from 0.5 (purely random classifier) to 1.0 (perfect classifier) |
| True Positive Rate | 0.525 | 0.000 | 1.000 | 52.5% of positive instances were correctly identified.; i.e. 31 "Positive Class" labels were correctly identified out of 59 instances; a.k.a Sensitivity/Recall |
| True Negative Rate | 0.851 | 1.000 | 0.000 | 85.1% of negative instances were correctly identified.; i.e. 120 "Negative Class" labels were correctly identified out of 141 instances |
| False Positive Rate | 0.149 | 0.000 | 1.000 | 14.9% of negative instances were incorrectly identified as positive; i.e. 21 "Negative Class" labels were incorrectly identified as "Positive Class", out of 141 instances |
| False Negative Rate | 0.475 | 1.000 | 0.000 | 47.5% of positive instances were incorrectly identified as negative; i.e. 28 "Positive Class" labels were incorrectly identified as "Negative Class", out of 59 instances |
| Positive Predictive Value | 0.596 | 0.000 | 0.295 | When the model claims an instance is positive, it is correct 59.6% of the time; i.e. out of the 52 times the model predicted "Positive Class", it was correct 31 times; a.k.a precision |
| Negative Predictive Value | 0.811 | 0.705 | 0.000 | When the model claims an instance is negative, it is correct 81.1% of the time; i.e. out of the 148 times the model predicted "Negative Class", it was correct 120 times |
| F1 Score | 0.559 | 0.000 | 0.456 | The F1 score can be interpreted as a weighted average of the precision and recall, where an F1 score reaches its best value at 1 and worst score at 0. |
| Precision/Recall AUC | 0.618 | 0.295 | 0.295 | Precision/Recall AUC is calculated with `average_precision` which summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold. See sci-kit learn documentation for caveats. |
| Accuracy | 0.755 | 0.705 | 0.295 | 75.5% of instances were correctly identified |
| Error Rate | 0.245 | 0.295 | 0.705 | 24.5% of instances were incorrectly identified |
| % Positive | 0.295 | 0.295 | 0.295 | 29.5% of the data are positive; i.e. out of 200 total observations; 59 are labeled as "Positive Class" |
| Total Observations | 200 | 200 | 200 | There are 200 total observations; i.e. sample size |
evaluator.plot_roc_auc_curve().show()
<Figure size 720x444.984 with 0 Axes>
evaluator.plot_precision_recall_auc_curve().show()
evaluator.plot_threshold_curves(score_threshold_range=(0.1, 0.7)).show()
evaluator.plot_precision_recall_tradeoff(score_threshold_range=(0.1, 0.6)).show()
evaluator.calculate_lift_gain(return_style=True)
| Gain | Lift | |
|---|---|---|
| Percentile | ||
| 5 | 0.14 | 2.71 |
| 10 | 0.22 | 2.20 |
| 15 | 0.32 | 2.15 |
| 20 | 0.44 | 2.20 |
| 25 | 0.51 | 2.03 |
| 30 | 0.58 | 1.92 |
| 35 | 0.64 | 1.84 |
| 40 | 0.71 | 1.78 |
| 45 | 0.76 | 1.69 |
| 50 | 0.83 | 1.66 |
| 55 | 0.85 | 1.54 |
| 60 | 0.86 | 1.44 |
| 65 | 0.86 | 1.33 |
| 70 | 0.88 | 1.26 |
| 75 | 0.92 | 1.22 |
| 80 | 0.97 | 1.21 |
| 85 | 0.97 | 1.14 |
| 90 | 1.00 | 1.11 |
| 95 | 1.00 | 1.05 |
| 100 | 1.00 | 1.00 |